***************
* Title: gambia_ecd_edcc_tableA3.do
* Author: Todd Pugatch
* Description: replication code for Blimpo, Carneiro, Jervis, and Pugatch,
*	"Improving Access and Quality in Early Childhood Development Programs: 
*		Experimental Evidence from The Gambia"
*	for Economic Development and Cultural Change
* Inputs: ECD_3to6_Gambia_cleanv1.dta
* Outputs: gambia_ecd_edcc_tableA3.txt, gambia_ecd_edcc_tableA3[a-b].csv
* Notes: creates Table A3
****************
#delimit;
local start=`"$S_TIME"';
clear;
clear matrix;
clear mata;
graph drop _all;
cap log close;
set more off;
/*set directory:
	cd mydir
*/
local data=`"Data\cleaned"';
local output=`"analysis\output"';
*log using `output'\gambia_ecd_edcc_tableA3.txt, text replace;

* LOAD AND PREPARE DATA;
* define sample as in gambia_ecd_attrition1.do;
qui use `data'\ECD_3to6_Gambia_cleanv1, clear;
/*NEED TO CHECK TREATMENT STATUS OF SETTLEMENT 37028. DISREGARD FOR NOW*/
qui drop if settlement_code==37028;

* define 3 groups:
	--in baseline
	--in endline (original sample)
	--in endline (newly sampled);
qui gen in_baseline=(ip22!=3 & ip22!=.);	
qui gen in_endline=(interview_result==1);
qui gen in_endline_old=(in_endline==1 & in_baseline==1);
qui gen in_endline_new=(in_endline==1 & in_baseline==0);
/*treat unresolved gender mismatches as new to endline*/
qui replace in_endline_new=1 if in_endline_new==0 & child_gender_mismatch_resolved==0;
qui replace in_endline_old=. if in_endline_new==1;

* repeat for having valid MDAT fine motor and language/hearing scores;
/*note that "in sample" defined as having at least one valid MDAT score, not both as in previous analyses of attrition*/
qui gen in_baseline_mdat=(in_baseline==1 & (zfinemotor_baseline!=.|zlanghear_baseline!=.));
qui replace in_baseline_mdat=. if in_endline_new==1;
foreach x in endline endline_old endline_new {;
	qui gen in_`x'_mdat=(in_`x'==1 & (zfinemotor_endline!=.|zlanghear_endline!=.));
};
qui replace in_endline_old_mdat=. if in_endline_new==1;
qui gen in_endline_old_mdat_base=in_endline_old_mdat;
qui replace in_endline_old_mdat_base=. if in_baseline_mdat!=1; /*in baseline MDAT & in endline MDAT*/
	
/*keep eligibles with endline MDAT score only*/
* keep if baseline age from 3-6 years, or new to endline;
count; 
keep if (child_age_mths_dob>=36 & child_age_mths_dob<84 & in_baseline==1)|(child_age_mths_dob==. & in_baseline==0);

* keep if new to endline and would have been 3-6 at baseline (define as 4-8 at endline to allow errors);
keep if in_endline_new==0|(in_endline_new==1 & selected_child_age>=48 & selected_child_age<=96 & selected_child_age!=.)|
	(in_endline_new==1 & child_gender_mismatch_resolved==0);

qui drop if in_endline_new==1 & child_gender_mismatch_resolved==0; /*drop those with unresolved inconsistent gender histories, because all analysis here conditions on baseline characteristics*/
qui drop if in_endline_mdat==0; /*keep only those with endline MDAT score*/

* baseline data cleaning (follows gambia_ecd_balance3.do);
/*child mother tongue*/
qui gen child_mandinka=(child_mothertongue==1);
qui gen child_fula=(child_mothertongue==2);
qui gen child_jola=(child_mothertongue==4);
qui gen child_serahule=(child_mothertongue==5);
qui gen child_other=(child_mothertongue==3|child_mothertongue>=6); /*includes missing*/
foreach x in mandinka fula jola serahule other {;
	lab var child_`x' "child's mother tongue is `x'";
};

/*hours worked by HH head & mother*/
foreach x in headhh mother {;
	qui gen `x'_hrswrk_topcode=`x'_hrswrk;
	qui replace `x'_hrswrk_topcode=80 if `x'_hrswrk>80 & `x'_hrswrk!=.; /*top-code hours worked at 80*/
	qui tab `x'_hrswrk_cat, mi gen(`x'_hrswrk_cat);
};

/*winsorize annual per-capita expenditure below 1st and above 99th percentiles*/
foreach x in expend_yr_hhpc expend_yr_hhpc_usd {;
	winsor `x', gen(`x'_wins) p(0.01);
};
qui gen ecd_wtppctw_baseline=ecd_wtpq_baseline*12/expend_yr_hhpc_wins;
lab var expend_yr_hhpc_wins "Annual HH expenditure per capita, GMD, winsorized at 1/99 percentiles";
lab var expend_yr_hhpc_usd_wins "Annual HH expenditure per capita, USD, winsorized at 1/99 percentiles";
lab var ecd_wtppctw_baseline "Amount willing to pay for ECD as shr of monthly HH pc expenditure (winsorized) at baseline";

* endline data cleaning;
/*hours worked by HH head & mother*/
foreach x in headhh mother {;
	qui gen `x'_hrswrk_topcode_el=`x'_hrswrk_el;
	qui replace `x'_hrswrk_topcode_el=80 if `x'_hrswrk_el>80 & `x'_hrswrk_el!=.; /*top-code hours worked at 80*/
	qui tab `x'_hrswrk_cat_el, mi gen(`x'_hrswrk_cat_el);
};

/*winsorize annual per-capita expenditure below 1st and above 99th percentiles*/
foreach x in expend_yr_hhpc expend_yr_hhpc_usd {;
	winsor `x'_el, gen(`x'_wins_el) p(0.01);
};
lab var expend_yr_hhpc_wins_el "Annual HH expenditure per capita, GMD, winsorized at 1/99 percentiles, endline";
lab var expend_yr_hhpc_usd_wins_el "Annual HH expenditure per capita, USD, winsorized at 1/99 percentiles, endline";

* sample definitions;
qui gen communitybased=(treatment==6);
qui gen purecontrol=(treatment==1);
qui gen ECDAnnex_control=(treatment==4);
qui gen ECDAnnex_treated=(treatment==5);
qui gen ECDAnnex=ECDAnnex_control+ECDAnnex_treated;
qui gen communitybased_all=communitybased+purecontrol;
qui gen experiment=(ECDAnnex==1);
lab def experiment 0 "community-based" 1 "ECD Annex";
lab val experiment experiment;

* normalize endline MDAT scores to combined control group distribution;
/*adjust for age and gender, following normalization of gambia_ecd_clean1.do*/
drop zfinemotor_adj_endline zlanghear_adj_endline;
foreach x in finemotor langhear {;
	qui xi: reg `x'_endline selected_child_age selected_child_age2 child_female 
		if purecontrol==1|ECDAnnex_control==1;
	qui predict e, residuals; 
	qui su e if purecontrol==1|ECDAnnex_control==1;
	qui gen z`x'_adj_endline=(e-r(mean))/r(sd);
	drop e;
};
lab var zfinemotor_adj_endline "MDAT fine motor endline z-score (age-adjusted)";
lab var zlanghear_adj_endline "MDAT language & hearing endline z-score (age-adjusted)";

* sample sizes (# of children and project sites);
qui egen site=tag(settlement_code);
table treatment, c(freq rawsum site);
table treatment, c(rawsum in_baseline rawsum in_endline_old rawsum in_endline_new);

* PREPARE DATA FOR HETEROGENEITY ANALYSIS;
* categories for heterogeneity analysis;
/*simplify names of baseline variables to streamline variable creation*/
local X "ecd_attend ecd_wtppctw vaccinepct child_sick mother_health discsevere_vlnt";
foreach x in `X' {;
	qui gen `x'=`x'_baseline;
};
qui gen stimobjectspct=stimobjectspct_bl;
qui gen child_male=1-child_female;
qui gen child_notsick=1-child_sick;

/*quantiles of baseline HH expenditure, WTP for ECD, asset index (within ECD Annex sample!)*/
foreach x in expend_yr_hhpc_usd_wins ecd_wtppctw pc1 {;
	qui xtile `x'_med=`x', n(2);
	qui xtile `x'_qrt=`x', n(4);
	qui gen `x'_abvmed=(`x'_med==2);
	qui gen `x'_belmed=(`x'_med==1);
	qui tab `x'_qrt, gen(`x'_qrt); /*quartiles: 1=lowest, 4=highest*/
	lab var `x'_abvmed "`x' above median (full sample)";
	lab var `x'_belmed "`x' at or below median (full sample)";
	forval q=1/4 {;
		lab var `x'_qrt`q' "`x' quartile `q' (1=lowest, full sample)";
	};
};

/*baseline MDAT scores and height for age: +/- 0, 1 sd, 2 sd*/
foreach x in zfinemotor zlanghear zfinemotor_adj zlanghear_adj haz_dob {;
	foreach z in neg pos plus1sd minus1sd plus2sd minus2sd {;
		qui gen `x'_`z'=.;
	};
	qui replace `x'_pos=1 if `x'_baseline>0 & `x'_baseline!=.;
	qui replace `x'_pos=0 if `x'_baseline<=0 & `x'_baseline!=.;
	qui replace `x'_neg=1-`x'_pos;
	qui replace `x'_plus1sd=1 if `x'_baseline>1 & `x'_baseline!=.;
	qui replace `x'_plus1sd=0 if `x'_baseline<=1 & `x'_baseline!=.;
	qui replace `x'_minus1sd=1 if `x'_baseline<-1 & `x'_baseline!=.;
	qui replace `x'_minus1sd=0 if `x'_baseline>=-1 & `x'_baseline!=.;
	qui replace `x'_plus2sd=1 if `x'_baseline>2 & `x'_baseline!=.;
	qui replace `x'_plus2sd=0 if `x'_baseline<=2 & `x'_baseline!=.;
	qui replace `x'_minus2sd=1 if `x'_baseline<-2 & `x'_baseline!=.;
	qui replace `x'_minus2sd=0 if `x'_baseline>=-2 & `x'_baseline!=.;
	lab var `x'_pos "`x'_baseline>0";
	lab var `x'_plus1sd "`x'_baseline>=1";
	lab var `x'_minus1sd "`x'_baseline<=-1";
	lab var `x'_plus2sd "`x'_baseline>=2";
	lab var `x'_minus2sd "`x'_baseline<=-2";
};

/*indicators for above median*/
foreach x in vaccinepct mother_health stimobjectspct discsevere_vlnt {;
	qui xtile `x'_med=`x', n(2);
	qui gen `x'_abvmed=(`x'_med==2);
	qui gen `x'_belmed=(`x'_med==1);
	lab var `x'_abvmed "`x' above median (full sample)";
	lab var `x'_belmed "`x' at or below median (full sample)";
};

/*other*/
qui gen mother_attendedschl=(mother_schl_yrs>0 & mother_schl_yrs!=.);
qui gen mother_noschl=1-mother_attendedschl;
qui gen under5=(child_age_mths_dob<60);
lab var mother_attendedschl "mother attended some school";
lab var mother_attendedschl "mother didn't attend school";
lab var under5 "under 5 years old at baseline"; 

drop *_med *_qrt;

* interactions with treatment;
local X "child_female child_male region2 under5 ecd_attend 
	expend_yr_hhpc_usd_wins_abvmed ecd_wtppctw_abvmed pc1_belmed pc1_abvmed
	mother_noschl mother_attendedschl child_sick child_notsick 
	vaccinepct_abvmed mother_health_belmed mother_health_abvmed 
	stimobjectspct_belmed stimobjectspct_abvmed discsevere_vlnt_abvmed 
	mother_head_baseline polygamous";
foreach x in `X' {;
	if ("`x'"=="expend_yr_hhpc_usd_wins_abvmed") {;
		qui gen ECDA_expend_abvmed=ECDAnnex_treated*`x';
		qui gen cb_expend_abvmed=communitybased*`x';
	};
	else {;
		qui gen ECDA_`x'=ECDAnnex_treated*`x';
		qui gen cb_`x'=communitybased*`x';
	};
};
foreach x in zfinemotor zlanghear zfinemotor_adj zlanghear_adj haz_dob {;
	foreach z in neg pos plus1sd minus1sd plus2sd minus2sd {;
		qui gen ECDA_`x'_`z'=ECDAnnex_treated*`x'_`z';
		qui gen cb_`x'_`z'=communitybased*`x'_`z';
	};
};

qui gen haz_dob_endline=haz_endline; /*to make code work below*/
foreach d in ECDA cb {;
	foreach s in male female {;
		ren `d'_child_`s' `d'_`s';
	};
};

* ANALYSIS;
* outcomes: MDAT scores (adjusted);
* run interacted models with subgroup interactions, using SUR (suest);
* then test across models for subgroup effects for relatively 
	advantaged/disadvantaged groups;
/*Estimate 4 versions of model:
	1) control only for main effects of subgroup membership (all subgroup controls
		in each equation) [see gambia_ecd_results62.do]
	2) control only for main effect of subgroup whose treatment interaction is
		being tested [as in gambia_ecd_results27.do]
	3) control for subgroups + variables that are imbalanced at baseline
		[see gambia_ecd_results60.do]
	4) augment (1) to include baseline outcomes (and also test interactions with 
		initial scores) [see gambia_ecd_results62.do]
	
	Just use Model 1 here to focus on Table A3 in paper.*/
/*code missing controls as 0 and include dummy for missing in regressions*/
local X "ecd_attend_baseline headhh_ag pc1 ecd_wtppctw_baseline expend_yr_hhpc_usd_wins";
foreach x in `X' {;
	qui gen `x'_a=`x';
	qui replace `x'_a=0 if `x'==.;
	qui gen `x'_m=(`x'==.);
	lab var `x'_a "`x', replacing missing with 0";
	lab var `x'_m "`x' missing";
};

/*child sick: set as "not sick" if missing*/
qui replace child_sick=0 if child_sick==.;
qui replace child_notsick=1 if child_notsick==.;
foreach x in sick notsick {;
	qui replace ECDA_child_`x'=ECDAnnex_treated*child_`x' if ECDA_child_`x'==.;
	qui replace cb_child_`x'=communitybased*child_`x' if cb_child_`x'==.;
};

qui save `data'\ecdtemp, replace;

/*define outcomes*/
local Y "zfinemotor_adj zlanghear_adj";
local replace "replace";
local label "N R-squared lowSES highSES lowvhighSES";

*************************************;
* COMMUNITY-BASED ECD EXPERIMENT;
*************************************;
qui use `data'\ecdtemp, clear;
qui keep if purecontrol==1|communitybased==1;

* MULTIPLE HYPOTHESIS TESTS;
/*Proceed in 4 steps:
	1. Run analysis, using conventional p-values
	2. Collect all p-values
	3. Calculate sharpened q-values for each p-value
	4. Output sharpened q-values*/

* 1-2. ANALYSIS, INCLUDING GATHERING OF P-VALUES;
/*Gather all p-values from hypotheses tested. For each regression, this
	includes p-values for each treatment coefficient, plus p-values for each
	post-estimation hypothesis. */
	
* MODEL 1(a): main effects of subgroup membership (all subgroup controls in each equation);
/*subsample: children who DID NOT attend ECD at baseline*/
/*define sets of controls*/
local Xcommon "region2 child_male child_female pc1_belmed pc1_abvmed 
	mother_noschl mother_attendedschl child_notsick child_sick mother_health_belmed
	mother_health_abvmed stimobjectspct_belmed stimobjectspct_abvmed";
local X1 "cb_male cb_female `Xcommon'";
local X2 "cb_pc1_belmed cb_pc1_abvmed `Xcommon'";
local X3 "cb_mother_noschl cb_mother_attendedschl `Xcommon'";
local X4 "cb_child_notsick cb_child_sick `Xcommon'";
local X5 "cb_mother_health_belmed cb_mother_health_abvmed `Xcommon'";
local X6 "cb_stimobjectspct_belmed cb_stimobjectspct_abvmed `Xcommon'";

local m1_g1 "male";
local m1_g2 "female";
local m2_g1 "pc1_belmed";
local m2_g2 "pc1_abvmed";
local m3_g1 "mother_noschl";
local m3_g2 "mother_attendedschl";
local m4_g1 "child_notsick";
local m4_g2 "child_sick";
local m5_g1 "mother_health_belmed";
local m5_g2 "mother_health_abvmed";
local m6_g1 "stimobjectspct_belmed";
local m6_g2 "stimobjectspct_abvmed";

* estimate system;
/*Outcome 1: Fine motor skills*/
estimates clear;
forval m=1/6 {;
	qui reg zfinemotor_adj_endline `X`m'' if ecd_attend_baseline==0, nocons;
	estimates store m`m';
	
	/*gather p-values of main coefficients*/
	local g=1; /*initialize group index*/
	forval g=1/2 {;
		scalar define cbfm_m`m'_g`g'_b=_b[cb_`m`m'_g`g''];
		scalar define cbfm_m`m'_g`g'_se=_se[cb_`m`m'_g`g''];
		scalar define t=_b[cb_`m`m'_g`g'']/_se[cb_`m`m'_g`g''];
		scalar define cbfm_m`m'_g`g'_p=2*ttail(e(df_r),abs(t));
	};
};
suest m1 m2 m3 m4 m5 m6, cluster(settlement_code);

/*set up tests*/
local H1 "[m2_mean]cb_pc1_belmed [m3_mean]cb_mother_noschl 
	[m6_mean]cb_stimobjectspct_belmed";
local H2 "[m2_mean]cb_pc1_abvmed [m3_mean]cb_mother_attendedschl 
	[m6_mean]cb_stimobjectspct_abvmed";
local H3 "[m2_mean]cb_pc1_belmed + [m3_mean]cb_mother_noschl +
	[m6_mean]cb_stimobjectspct_belmed =
	[m2_mean]cb_pc1_abvmed + [m3_mean]cb_mother_attendedschl +
	[m6_mean]cb_stimobjectspct_abvmed";
/*Test 1: do low SES groups have non-zero treatment effects?*/
test `H1';
estadd scalar H1p=r(p): m?;
scalar define cbfm_H1p=r(p);
/*Test 2: do high SES groups have non-zero treatment effects?*/
test `H2';
estadd scalar H2p=r(p): m?;
scalar define cbfm_H2p=r(p);
/*Test 3: do low and high SES groups have significantly different treatment 
	effects from each other?*/
test `H3';
estadd scalar H3p=r(p): m?;
scalar define cbfm_H3p=r(p);

estout m? using `output'\gambia_ecd_edcc_tableA3a, 
	cells(b(star fmt(3)) se(par fmt(3))) starlevels(* 0.10 ** 0.05 *** .01)
	drop(`Xcommon') stats(N r2 H1p H2p H3p, labels(`label')) 
	title("Outcome: Fine motor skills") 
	note("Membership dummies for all subgroups included in all equations.")
	legend `replace';

/*Outcome 2: Language and hearing*/
estimates clear;
forval m=1/6 {;
	qui reg zlanghear_adj_endline `X`m'' if ecd_attend_baseline==0, nocons;
	estimates store m`m';
	
	/*gather p-values of main coefficients*/
	local g=1; /*initialize group index*/
	forval g=1/2 {;
		scalar define cblh_m`m'_g`g'_b=_b[cb_`m`m'_g`g''];
		scalar define cblh_m`m'_g`g'_se=_se[cb_`m`m'_g`g''];
		scalar define t=_b[cb_`m`m'_g`g'']/_se[cb_`m`m'_g`g''];
		scalar define cblh_m`m'_g`g'_p=2*ttail(e(df_r),abs(t));
	};	
};
suest m1 m2 m3 m4 m5 m6, cluster(settlement_code);

/*Test 1: do low SES groups have non-zero treatment effects?*/
test `H1';
estadd scalar H1p=r(p): m?;
scalar define cblh_H1p=r(p);
/*Test 2: do high SES groups have non-zero treatment effects?*/
test `H2';
estadd scalar H2p=r(p): m?;
scalar define cblh_H2p=r(p);
/*Test 3: do low and high SES groups have significantly different treatment 
	effects from each other?*/
test `H3';
estadd scalar H3p=r(p): m?;
scalar define cblh_H3p=r(p);

estout m? using `output'\gambia_ecd_edcc_tableA3a, 
	cells(b(star fmt(3)) se(par fmt(3))) starlevels(* 0.10 ** 0.05 *** .01)
	drop(`Xcommon') stats(N r2 H1p H2p H3p, labels(`label')) 
	title("Outcome: Language and hearing") 
	note("Membership dummies for all subgroups included in all equations.")
	legend append;
	
* 3. CALCULATE Q-VALUES FOR ALL HYPOTHESES;
/*Calculate sharpened q-values from Benjamini, Krieger, Yuketeli (2006).
	Use code from fdr_sharpened_qvalues.do from Anderson (2008), available at 
		https://are.berkeley.edu/~mlanderson/ARE_Website/Research.html*/
* prep data: put p-values from above into a dataset;
clear;
local M=6;			/*# columns/models*/
local G=2;			/*# rows/groups*/
local n=2*`M'*`G';	/*# hypotheses (coefficients)*/
qui set obs `n';
qui egen model=seq(), from(1) to(`M');
sort model;
qui egen group=seq(), from(1) to(`G');
sort model group;
qui egen finemotor=seq(), from(0) to(1);
sort finemotor model group;

/*add three observations for omnibus tests*/
qui set obs 30;
sort model group;
qui replace model=7 if model==.;
qui replace group=1 in 25;
qui replace group=2 in 26;
qui replace group=3 in 27;
qui replace group=1 in 28;
qui replace group=2 in 29;
qui replace group=3 in 30;
qui replace finemotor=0 in 25/27;
qui replace finemotor=1 in 28/30;
sort finemotor model group;

foreach x in b se pval {;
	qui gen double `x'=.;
};

/*fill in coeff, se where applicable & corresponding p-value*/
forval m=1/`M' {;
	foreach x in b se p {;
		forval g=1/`G' {;
			qui replace `x'=cbfm_m`m'_g`g'_`x' if model==`m' & group==`g' & finemotor==1;
			qui replace `x'=cblh_m`m'_g`g'_`x' if model==`m' & group==`g' & finemotor==0;
		};
	};
};

/*fill in p-value for omnibus tests*/
forval t=1/3 {;
	qui replace pval=cbfm_H`t'p if model==7 & group==`t' & finemotor==1;
	qui replace pval=cblh_H`t'p if model==7 & group==`t' & finemotor==0;
};

* START OF ANDERSON CODE FROM fdr_sharpened_qvalues.do (modified);
* Collect the total number of p-values tested;
quietly sum pval;
local totalpvals = r(N);

* Sort the p-values in ascending order and generate a variable that codes each p-value's rank;

quietly gen int original_sorting_order = _n;
quietly sort pval;
quietly gen int rank = _n if pval~=.;

* Generate the variable that will contain the BKY (2006) sharpened q-values;

qui gen bky06_qval = 1 if pval~=.;
	
* Set the initial counter to 1 ;

local qval = 1;

* Set up a loop that begins by checking which hypotheses are rejected at q = 1.000, 
	then checks which hypotheses are rejected at q = 0.999, 
	then checks which hypotheses are rejected at q = 0.998, etc.  
	The loop ends by checking which hypotheses are rejected at q = 0.001.;
while `qval' > 0 {;
	* First Stage;
	* Generate the adjusted first stage q level we are testing: q' = q/1+q;
	local qval_adj = `qval'/(1+`qval');
	
	* Generate value q'*r/M;
	qui gen fdr_temp1 = `qval_adj'*rank/`totalpvals';
	
	* Generate binary variable checking condition p(r) <= q'*r/M;
	qui gen reject_temp1 = (fdr_temp1>=pval) if pval~=.;
	
	* Generate variable containing p-value ranks for all p-values that meet above condition;
	qui gen reject_rank1 = reject_temp1*rank;
	
	* Record the rank of the largest p-value that meets above condition;
	qui egen total_rejected1 = max(reject_rank1);

	* Second Stage;
	* Generate the second stage q level that accounts for hypotheses rejected in first stage: q_2st = q'*(M/m0);
	local qval_2st = `qval_adj'*(`totalpvals'/(`totalpvals'-total_rejected1[1]));
	
	* Generate value q_2st*r/M;
	qui gen fdr_temp2 = `qval_2st'*rank/`totalpvals';
	
	* Generate binary variable checking condition p(r) <= q_2st*r/M;
	qui gen reject_temp2 = (fdr_temp2>=pval) if pval~=.;
	
	* Generate variable containing p-value ranks for all p-values that meet above condition;
	qui gen reject_rank2 = reject_temp2*rank;
	
	* Record the rank of the largest p-value that meets above condition;
	qui egen total_rejected2 = max(reject_rank2);

	* A p-value has been rejected at level q if its rank is less than or equal to the rank of the max p-value that meets the above condition;
	qui replace bky06_qval = `qval' if rank <= total_rejected2 & rank~=.;
	
	* Reduce q by 0.001 and repeat loop;
	drop fdr_temp* reject_temp* reject_rank* total_rejected*;
	local qval = `qval' - .001;
};

quietly sort original_sorting_order;

******************************
* END OF ANDERSON CODE FROM fdr_sharpened_qvalues.do (modified);
******************************

* 4. OUTPUT RESULTS;
* list results;
list model group finemotor b se pval bky06_qval;

* export as Excel file;
local X "model group finemotor b se pval bky06_qval";
qui export excel `X' using `output'\gambia_ecd_edcc_tableA3_cba_q.xlsx, 
	firstrow(variables) replace;	
	
* MODEL 1(b): main effects of subgroup membership (all subgroup controls in each equation);
qui use `data'\ecdtemp, clear;
qui keep if purecontrol==1|communitybased==1;

/*subsample: children who DID attend ECD at baseline*/
/*define sets of controls*/
local Xcommon "region2 child_male child_female pc1_belmed pc1_abvmed 
	mother_noschl mother_attendedschl child_notsick child_sick mother_health_belmed
	mother_health_abvmed stimobjectspct_belmed stimobjectspct_abvmed";
local X1 "cb_male cb_female `Xcommon'";
local X2 "cb_pc1_belmed cb_pc1_abvmed `Xcommon'";
local X3 "cb_mother_noschl cb_mother_attendedschl `Xcommon'";
local X4 "cb_child_notsick cb_child_sick `Xcommon'";
local X5 "cb_mother_health_belmed cb_mother_health_abvmed `Xcommon'";
local X6 "cb_stimobjectspct_belmed cb_stimobjectspct_abvmed `Xcommon'";

local m1_g1 "male";
local m1_g2 "female";
local m2_g1 "pc1_belmed";
local m2_g2 "pc1_abvmed";
local m3_g1 "mother_noschl";
local m3_g2 "mother_attendedschl";
local m4_g1 "child_notsick";
local m4_g2 "child_sick";
local m5_g1 "mother_health_belmed";
local m5_g2 "mother_health_abvmed";
local m6_g1 "stimobjectspct_belmed";
local m6_g2 "stimobjectspct_abvmed";

* estimate system;
/*Outcome 1: Fine motor skills*/
estimates clear;
forval m=1/6 {;
	qui reg zfinemotor_adj_endline `X`m'' if ecd_attend_baseline==1, nocons;
	estimates store m`m';
	
	/*gather p-values of main coefficients*/
	local g=1; /*initialize group index*/
	forval g=1/2 {;
		scalar define cbfm_m`m'_g`g'_b=_b[cb_`m`m'_g`g''];
		scalar define cbfm_m`m'_g`g'_se=_se[cb_`m`m'_g`g''];
		scalar define t=_b[cb_`m`m'_g`g'']/_se[cb_`m`m'_g`g''];
		scalar define cbfm_m`m'_g`g'_p=2*ttail(e(df_r),abs(t));
	};
};
suest m1 m2 m3 m4 m5 m6, cluster(settlement_code);

/*set up tests*/
local H1 "[m2_mean]cb_pc1_belmed [m3_mean]cb_mother_noschl 
	[m6_mean]cb_stimobjectspct_belmed";
local H2 "[m2_mean]cb_pc1_abvmed [m3_mean]cb_mother_attendedschl 
	[m6_mean]cb_stimobjectspct_abvmed";
local H3 "[m2_mean]cb_pc1_belmed + [m3_mean]cb_mother_noschl +
	[m6_mean]cb_stimobjectspct_belmed =
	[m2_mean]cb_pc1_abvmed + [m3_mean]cb_mother_attendedschl +
	[m6_mean]cb_stimobjectspct_abvmed";
/*Test 1: do low SES groups have non-zero treatment effects?*/
test `H1';
estadd scalar H1p=r(p): m?;
scalar define cbfm_H1p=r(p);
/*Test 2: do high SES groups have non-zero treatment effects?*/
test `H2';
estadd scalar H2p=r(p): m?;
scalar define cbfm_H2p=r(p);
/*Test 3: do low and high SES groups have significantly different treatment 
	effects from each other?*/
test `H3';
estadd scalar H3p=r(p): m?;
scalar define cbfm_H3p=r(p);

estout m? using `output'\gambia_ecd_edcc_tableA3b, 
	cells(b(star fmt(3)) se(par fmt(3))) starlevels(* 0.10 ** 0.05 *** .01)
	drop(`Xcommon') stats(N r2 H1p H2p H3p, labels(`label')) 
	title("Outcome: Fine motor skills") 
	note("Membership dummies for all subgroups included in all equations.")
	legend `replace';

/*Outcome 2: Language and hearing*/
estimates clear;
forval m=1/6 {;
	qui reg zlanghear_adj_endline `X`m'' if ecd_attend_baseline==1, nocons;
	estimates store m`m';
	
	/*gather p-values of main coefficients*/
	local g=1; /*initialize group index*/
	forval g=1/2 {;
		scalar define cblh_m`m'_g`g'_b=_b[cb_`m`m'_g`g''];
		scalar define cblh_m`m'_g`g'_se=_se[cb_`m`m'_g`g''];
		scalar define t=_b[cb_`m`m'_g`g'']/_se[cb_`m`m'_g`g''];
		scalar define cblh_m`m'_g`g'_p=2*ttail(e(df_r),abs(t));
	};	
};
suest m1 m2 m3 m4 m5 m6, cluster(settlement_code);

/*Test 1: do low SES groups have non-zero treatment effects?*/
test `H1';
estadd scalar H1p=r(p): m?;
scalar define cblh_H1p=r(p);
/*Test 2: do high SES groups have non-zero treatment effects?*/
test `H2';
estadd scalar H2p=r(p): m?;
scalar define cblh_H2p=r(p);
/*Test 3: do low and high SES groups have significantly different treatment 
	effects from each other?*/
test `H3';
estadd scalar H3p=r(p): m?;
scalar define cblh_H3p=r(p);

estout m? using `output'\gambia_ecd_edcc_tableA3b, 
	cells(b(star fmt(3)) se(par fmt(3))) starlevels(* 0.10 ** 0.05 *** .01)
	drop(`Xcommon') stats(N r2 H1p H2p H3p, labels(`label')) 
	title("Outcome: Language and hearing") 
	note("Membership dummies for all subgroups included in all equations.")
	legend append;	

* 3. CALCULATE Q-VALUES FOR ALL HYPOTHESES;
/*Calculate sharpened q-values from Benjamini, Krieger, Yuketeli (2006).
	Use code from fdr_sharpened_qvalues.do from Anderson (2008), available at 
		https://are.berkeley.edu/~mlanderson/ARE_Website/Research.html*/
* prep data: put p-values from above into a dataset;
clear;
local M=6;			/*# columns/models*/
local G=2;			/*# rows/groups*/
local n=2*`M'*`G';	/*# hypotheses (coefficients)*/
qui set obs `n';
qui egen model=seq(), from(1) to(`M');
sort model;
qui egen group=seq(), from(1) to(`G');
sort model group;
qui egen finemotor=seq(), from(0) to(1);
sort finemotor model group;

/*add three observations for omnibus tests*/
qui set obs 30;
sort model group;
qui replace model=7 if model==.;
qui replace group=1 in 25;
qui replace group=2 in 26;
qui replace group=3 in 27;
qui replace group=1 in 28;
qui replace group=2 in 29;
qui replace group=3 in 30;
qui replace finemotor=0 in 25/27;
qui replace finemotor=1 in 28/30;
sort finemotor model group;

foreach x in b se pval {;
	qui gen double `x'=.;
};

/*fill in coeff, se where applicable & corresponding p-value*/
forval m=1/`M' {;
	foreach x in b se p {;
		forval g=1/`G' {;
			qui replace `x'=cbfm_m`m'_g`g'_`x' if model==`m' & group==`g' & finemotor==1;
			qui replace `x'=cblh_m`m'_g`g'_`x' if model==`m' & group==`g' & finemotor==0;
		};
	};
};

/*fill in p-value for omnibus tests*/
forval t=1/3 {;
	qui replace pval=cbfm_H`t'p if model==7 & group==`t' & finemotor==1;
	qui replace pval=cblh_H`t'p if model==7 & group==`t' & finemotor==0;
};

* START OF ANDERSON CODE FROM fdr_sharpened_qvalues.do (modified);
* Collect the total number of p-values tested;
quietly sum pval;
local totalpvals = r(N);

* Sort the p-values in ascending order and generate a variable that codes each p-value's rank;

quietly gen int original_sorting_order = _n;
quietly sort pval;
quietly gen int rank = _n if pval~=.;

* Generate the variable that will contain the BKY (2006) sharpened q-values;

qui gen bky06_qval = 1 if pval~=.;
	
* Set the initial counter to 1 ;

local qval = 1;

* Set up a loop that begins by checking which hypotheses are rejected at q = 1.000, 
	then checks which hypotheses are rejected at q = 0.999, 
	then checks which hypotheses are rejected at q = 0.998, etc.  
	The loop ends by checking which hypotheses are rejected at q = 0.001.;
while `qval' > 0 {;
	* First Stage;
	* Generate the adjusted first stage q level we are testing: q' = q/1+q;
	local qval_adj = `qval'/(1+`qval');
	
	* Generate value q'*r/M;
	qui gen fdr_temp1 = `qval_adj'*rank/`totalpvals';
	
	* Generate binary variable checking condition p(r) <= q'*r/M;
	qui gen reject_temp1 = (fdr_temp1>=pval) if pval~=.;
	
	* Generate variable containing p-value ranks for all p-values that meet above condition;
	qui gen reject_rank1 = reject_temp1*rank;
	
	* Record the rank of the largest p-value that meets above condition;
	qui egen total_rejected1 = max(reject_rank1);

	* Second Stage;
	* Generate the second stage q level that accounts for hypotheses rejected in first stage: q_2st = q'*(M/m0);
	local qval_2st = `qval_adj'*(`totalpvals'/(`totalpvals'-total_rejected1[1]));
	
	* Generate value q_2st*r/M;
	qui gen fdr_temp2 = `qval_2st'*rank/`totalpvals';
	
	* Generate binary variable checking condition p(r) <= q_2st*r/M;
	qui gen reject_temp2 = (fdr_temp2>=pval) if pval~=.;
	
	* Generate variable containing p-value ranks for all p-values that meet above condition;
	qui gen reject_rank2 = reject_temp2*rank;
	
	* Record the rank of the largest p-value that meets above condition;
	qui egen total_rejected2 = max(reject_rank2);

	* A p-value has been rejected at level q if its rank is less than or equal to the rank of the max p-value that meets the above condition;
	qui replace bky06_qval = `qval' if rank <= total_rejected2 & rank~=.;
	
	* Reduce q by 0.001 and repeat loop;
	drop fdr_temp* reject_temp* reject_rank* total_rejected*;
	local qval = `qval' - .001;
};

quietly sort original_sorting_order;

******************************
* END OF ANDERSON CODE FROM fdr_sharpened_qvalues.do (modified);
******************************

* 4. OUTPUT RESULTS;
* list results;
list model group finemotor b se pval bky06_qval;

* export as Excel file;
local X "model group finemotor b se pval bky06_qval";
qui export excel `X' using `output'\gambia_ecd_edcc_tableA3_cbb_q.xlsx, 
	firstrow(variables) replace;	
	
erase `data'\ecdtemp.dta;
local end=`"$S_TIME"'; 
di "`start'";
di "`end'";
log close;
